# template elements
2018-02-13 Bobae Kang (Bobae.Kang@illinois.gov)
Source: Pixarbay.com
Source: r-project.org
“R is a language and environment for statistical computing and graphics.” - The R Foundation
Source: Time Magazine
# peak at the first rows of the data
head(ispcrime_tbl)
# A tibble: 6 x 12
year county viol~ murd~ rape robb~ aggA~ prop~ burg~ larc~ MVTft arson
<int> <fctr> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 2011 Adams 218 0 37 15 166 1555 272 1241 36 6
2 2011 Alexa~ 119 0 14 4 101 290 92 183 11 4
3 2011 Bond 6 1 0 0 5 211 58 147 5 1
4 2011 Boone 59 0 24 8 27 733 152 563 14 4
5 2011 Brown 7 0 1 0 6 38 14 22 1 1
6 2011 Bureau 42 0 4 3 35 505 90 405 8 2
# get a quick summary of each column
summary(ispcrime_tbl)
year county violentCrime murder
Min. :2011 Adams : 5 Min. : 0 Min. : 0.000
1st Qu.:2012 Alexander: 5 1st Qu.: 19 1st Qu.: 0.000
Median :2013 Bond : 5 Median : 42 Median : 0.000
Mean :2013 Boone : 5 Mean : 501 Mean : 7.026
3rd Qu.:2014 Brown : 5 3rd Qu.: 133 3rd Qu.: 1.000
Max. :2015 Bureau : 5 Max. :33348 Max. :566.000
(Other) :480 NA's :7 NA's :7
rape robbery aggAssult propertyCrime
Min. : 0.00 Min. : 0.0 Min. : 0.0 Min. : 0
1st Qu.: 1.00 1st Qu.: 0.0 1st Qu.: 15.0 1st Qu.: 133
Median : 6.00 Median : 2.0 Median : 33.0 Median : 349
Mean : 41.29 Mean : 172.3 Mean : 280.4 Mean : 2913
3rd Qu.: 22.00 3rd Qu.: 13.0 3rd Qu.: 102.0 3rd Qu.: 1190
Max. :1986.00 Max. :16095.0 Max. :15129.0 Max. :178902
NA's :7 NA's :7 NA's :7 NA's :7
burglary larcenyTft MVTft arson
Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. : 0.00
1st Qu.: 35.5 1st Qu.: 85.5 1st Qu.: 3.0 1st Qu.: 1.00
Median : 79.0 Median : 258.0 Median : 10.0 Median : 2.00
Mean : 589.3 Mean : 2084.9 Mean : 215.2 Mean : 23.45
3rd Qu.: 268.0 3rd Qu.: 852.0 3rd Qu.: 30.0 3rd Qu.: 8.50
Max. :38485.0 Max. :116145.0 Max. :22879.0 Max. :1418.00
NA's :7 NA's :7 NA's :7 NA's :7
# filter to keep only counties starting with C for 2015
# while creating and showing a new variable for total crime count
ispcrime_tbl %>%
filter(substr(county, 1, 1) == "C", year == 2015) %>%
mutate(totalCrime = violentCrime + propertyCrime) %>%
select(year, county, totalCrime)
# A tibble: 12 x 3
year county totalCrime
<int> <fctr> <int>
1 2015 Calhoun NA
2 2015 Carroll 176
3 2015 Cass 154
4 2015 Champaign 6486
5 2015 Christian 292
6 2015 Clark 103
7 2015 Clay 191
8 2015 Clinton 423
9 2015 Coles 805
10 2015 Cook 153575
11 2015 Crawford 282
12 2015 Cumberland 42
# get annual average count of violent crime by county
ispcrime_tbl %>%
group_by(county) %>%
summarise(annualAvgCrime = sum(violentCrime, propertyCrime, na.rm = TRUE) / 5)
# A tibble: 102 x 2
county annualAvgCrime
<fctr> <dbl>
1 Adams 1724
2 Alexander 385
3 Bond 190
4 Boone 426
5 Brown 39.0
6 Bureau 480
7 Calhoun 13.8
8 Carroll 196
9 Cass 109
10 Champaign 6567
# ... with 92 more rows
# scatterplot of violent crime nad property crime
ggplot(ispcrime_tbl, aes(x = year, y = violentCrime, color = county)) +
geom_line() +
guides(color = "none") +
labs(title = "Violent crime trend by region")